/****************************************************************************************************
* Title: Workhorses of Opportunity, Howard and Weinstein
*
* Inputs:
*   - closeasylums.dta
*   - county_outcomes_dta.dta
*
* Outputs:
*   - Distributional effect graphs comparing Normal School vs. Close Asylum counties
*     saved to close_vs_far_`xxxxx'.pdf for each outcome
*
* Description:
*   This script evaluates the effects of historical Normal Schools and Close Asylums on intergenerational
*   outcomes across the income distribution. It estimates effects separately for each parental income 
*   percentile and compares counties with normal schools and close asylums to others.
****************************************************************************************************/

* Load close asylum data and define Normal/Close Asylum indicators
clear
use "closeasylums"
rename hasnormalschool hasnormal
replace closeasylum = 0 if hasnormal
tempfile normal
save `normal'

* Prepare environment
clear
clear matrix
clear mata
set maxvar 15000

* Load outcome data and merge with institution indicators
use county_outcomes_dta
gen cty_fips = state * 1000 + county
rename state statefips
rename county countychetty
merge 1:m cty_fips using `normal', keep(3)

* Fill in missing female teen birth variables
foreach percentile in p1 p25 p50 p75 p100 {
    gen teenbrth_pooled_pooled_`percentile' = teenbrth_pooled_female_`percentile'
}

* Keep relevant variables and drop summary stats
keep has_dad_pooled_pooled_* has_mom_pooled_pooled_* coll_pooled_pooled_* comcoll_pooled_pooled_* ///
     somecoll_pooled_pooled_* hs_pooled_pooled_* married_pooled_pooled_* teenbrth_pooled_pooled_* ///
     jail_pooled_pooled_* staycz_pooled_pooled_* stayhome_pooled_pooled_* kfr_pooled_pooled_* ///
     working_pooled_pooled_* kir_pooled_pooled_* cty_fips hasnormal statefips closeasylum
drop *_mean *_n

* Reshape to long for percentile-by-percentile analysis
reshape long has_dad_pooled_pooled_ has_mom_pooled_pooled_ coll_pooled_pooled_ comcoll_pooled_pooled_ ///
             somecoll_pooled_pooled_ hs_pooled_pooled_ married_pooled_pooled_ teenbrth_pooled_pooled_ ///
             jail_pooled_pooled_ staycz_pooled_pooled_ stayhome_pooled_pooled_ kfr_pooled_pooled_ ///
             working_pooled_pooled_ kir_pooled_pooled_, i(cty_fips) j(percentile) string

* PCA: combine related outcome measures into principal component
pca coll_pooled_pooled_ comcoll_pooled_pooled_ somecoll_pooled_pooled_ hs_pooled_pooled_ ///
    married_pooled_pooled_ teenbrth_pooled_pooled jail_pooled_pooled_ staycz_pooled_pooled_ ///
    stayhome_pooled_pooled_ kfr_pooled_pooled_ working_pooled_pooled_ kir_pooled_pooled_
predict pr_comp_pooled_pooled_

* Reshape back to wide format
reshape wide has_dad_pooled_pooled_ has_mom_pooled_pooled_ coll_pooled_pooled_ comcoll_pooled_pooled_ ///
             somecoll_pooled_pooled_ hs_pooled_pooled_ married_pooled_pooled_ teenbrth_pooled_pooled ///
             jail_pooled_pooled_ staycz_pooled_pooled_ stayhome_pooled_pooled_ kfr_pooled_pooled_ ///
             working_pooled_pooled_ kir_pooled_pooled_ pr_comp_pooled_pooled, i(cty_fips) j(percentile) string

* Generate identifiers and replicate observations across percentiles
gen obsnum = _n
expand 5
bys obsnum: gen obscount = _n
gen percentile = "p1" if obscount == 1
replace percentile = "p25" if obscount == 2
replace percentile = "p50" if obscount == 3
replace percentile = "p75" if obscount == 4
replace percentile = "p100" if obscount == 5
encode percentile, gen(percentilecode)
gen percentile_num = (obscount * 25 - 25 + 1 * (obscount == 1) - 50) / 100

* Loop through each outcome and run regressions by percentile
gen outcome = .
gen outcome0 = .

foreach xxxxx in has_dad has_mom hs coll comcoll somecoll married teenbrth jail ///
                 staycz stayhome working kfr kir pr_comp {

    * Assign percentile-specific values
    foreach percentile in p1 p25 p50 p75 p100 {
        replace outcome = `xxxxx'_pooled_pooled_`percentile' if percentile == "`percentile'"
    }

    * Scale rates appropriately
    replace outcome = outcome * 100 if inlist("`xxxxx'", "kfr", "kir")

    * Estimate normal effect (vs. far asylum)
    reghdfe outcome i.percentilecode#1.hasnormal if hasnormal == 1 | closeasylum == 0, ///
        absorb(statefips#percentilecode) cluster(statefips) nocon
    est sto `xxxxx'

    preserve
        parmest, fast
        gen obscount = substr(parm, 1, 1)
        gen percentile = 1 if obscount == "1"
        replace percentile = 25 if obscount == "3"
        replace percentile = 50 if obscount == "4"
        replace percentile = 75 if obscount == "5"
        replace percentile = 100 if obscount == "2"
        gen institute = "Normal"
        tempfile results
        save `results', replace
    restore

    * Estimate close asylum effect (vs. far asylum)
    reghdfe outcome i.percentilecode#1.closeasylum if hasnormal == 0 & (closeasylum == 1 | closeasylum == 0), ///
        absorb(statefips#percentilecode) cluster(statefips) nocon
    est sto `xxxxx'

    preserve
        parmest, fast
        gen obscount = substr(parm, 1, 1)
        gen percentile = 1 if obscount == "1"
        replace percentile = 25 if obscount == "3"
        replace percentile = 50 if obscount == "4"
        replace percentile = 75 if obscount == "5"
        replace percentile = 100 if obscount == "2"
        gen institute = "Close Asylum"
        append using `results'
        tempfile results2
        save `results2', replace
    restore

    * Merge with asylum county means and plot
    preserve
        drop if hasnormal | closeasylum
        gen percentile2 = .
        replace percentile2 = 1 if percentile == "p1"
        replace percentile2 = 25 if percentile == "p25"
        replace percentile2 = 50 if percentile == "p50"
        replace percentile2 = 75 if percentile == "p75"
        replace percentile2 = 100 if percentile == "p100"
        collapse outcome, by(percentile2)
        rename percentile2 percentile
        merge 1:m percentile using `results2'
        gen normal = substr(parm, -6, 6) == "normal"
        replace percentile = percentile - 1 if normal
        gen min90 = -invttail(dof, .05) * stderr + estimate
        gen max90 = invttail(dof, .05) * stderr + estimate
        sort percentile

		
        * Generate figure
 
		
		twoway rspike min95 max95 percentile if institute=="Normal", color(dkgreen) || ///
			rspike min95 max95 percentile if  institute=="Close Asylum", color(maroon) || ///
			rcap min90 max90 percentile if  institute=="Normal", color(dkgreen) || ///
			rcap min90 max90 percentile if institute=="Close Asylum", color(maroon) || ///
			scatter estimate percentile if  institute=="Normal", mcolor(dkgreen) || ///
			scatter estimate percentile if institute=="Close Asylum", mcolor(maroon) msym(Sh)  yline(0, lpattern(dash) lcolor(gs8))  ytitle("Effect of Normal/Close Asylum County (Spikes)")  xtitle("Parents' Income Percentile") xlabel(0 25 50 75 100) || ///
			line outcome percentile if normal, lcolor(orange_red) lpattern(dash) yaxis(2) ytitle("Average Value in Asylum County (Dashed)", axis(2)) name(`xxxxx'_average, replace) legend(order(5 6) label(5 "Normal") label(6 "Close Asylums"))
		

        graph export "close_vs_far_`xxxxx'.pdf", as(pdf) replace
    restore
}
